将CASME II视频序列和标注文件转换为YOLOv8格式

要将CASME II数据集转换为YOLOv8可用的格式，我们需要完成以下几个关键步骤：从视频中提取帧、处理标注信息、转换为YOLO格式的标注文件，并组织成正确的目录结构。

## 1. 准备工作

### 1.1 安装必要库
```bash
pip install opencv-python pandas numpy tqdm
```

### 1.2 目录结构准备
```
CASME2_YOLO/
├── images/
│   ├── train/
│   └── val/
└── labels/
    ├── train/
    └── val/
```

## 2. 完整转换脚本

```python
import os
import cv2
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

def convert_casme2_to_yolo(casme_root, output_dir, test_size=0.2):
    """
    将CASME II数据集转换为YOLOv8格式
    
    参数:
        casme_root: CASME II数据集根目录
        output_dir: 输出目录
        test_size: 验证集比例
    """
    # 创建输出目录
    os.makedirs(os.path.join(output_dir, "images", "train"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "images", "val"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "labels", "train"), exist_ok=True)
    os.makedirs(os.path.join(output_dir, "labels", "val"), exist_ok=True)

    # 读取标注文件
    annotation_path = os.path.join(casme_root, "CASME2-coding-20190701.xlsx")
    df = pd.read_excel(annotation_path)
    
    # 微表情类别映射
    emotion_map = {
        'happiness': 0,
        'surprise': 1,
        'disgust': 2,
        'repression': 3,
        'others': 4
    }
    
    # 收集所有样本用于后续分割
    all_samples = []
    
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="Processing videos"):
        subject = row['Subject']
        video_name = row['Filename']
        onset = row['OnsetFrame']
        apex = row['ApexFrame']
        offset = row['OffsetFrame']
        emotion = row['Estimated Emotion']
        
        # 处理视频文件
        video_path = os.path.join(casme_root, "RAW", f"sub{subject:02}", video_name)
        cap = cv2.VideoCapture(video_path)
        
        if not cap.isOpened():
            print(f"Warning: Could not open video {video_path}")
            continue
        
        # 获取视频总帧数
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # 读取每一帧
        for frame_num in range(total_frames):
            ret, frame = cap.read()
            if not ret:
                break
                
            # 只处理微表情区间内的帧
            if onset <= frame_num + 1 <= offset:  # CASME II帧号从1开始
                # 生成唯一文件名
                base_name = f"sub{subject:02}_{video_name.split('.')[0]}_{frame_num+1:04d}"
                
                # 保存图像
                img_path = os.path.join(output_dir, "images", "train", f"{base_name}.jpg")
                cv2.imwrite(img_path, frame)
                
                # 创建YOLO格式的标注文件
                label_path = os.path.join(output_dir, "labels", "train", f"{base_name}.txt")
                
                # 这里需要添加面部检测和微表情区域标注的逻辑
                # 由于CASME II不提供bbox标注，我们需要自己生成或使用面部检测
                
                # 伪代码 - 实际应用中需要替换为真实的面部检测和微表情区域定位
                height, width = frame.shape[:2]
                
                # 假设我们检测到面部并定位微表情区域
                # 这里使用简单居中矩形作为示例 (实际应用需要替换)
                x_center = 0.5
                y_center = 0.4
                bbox_width = 0.3
                bbox_height = 0.2
                
                # 写入YOLO格式标注
                with open(label_path, 'w') as f:
                    f.write(f"{emotion_map.get(emotion.lower(), 4)} {x_center} {y_center} {bbox_width} {bbox_height}\n")
                
                # 添加到样本列表
                all_samples.append((img_path, label_path, emotion))
        
        cap.release()
    
    # 分割训练集和验证集
    if len(all_samples) > 0:
        train_samples, val_samples = train_test_split(
            all_samples, test_size=test_size, random_state=42, stratify=[s[2] for s in all_samples]
        )
        
        # 移动验证集文件
        for img_path, label_path, _ in val_samples:
            new_img_path = img_path.replace("/train/", "/val/")
            new_label_path = label_path.replace("/train/", "/val/")
            
            os.rename(img_path, new_img_path)
            os.rename(label_path, new_label_path)
    
    # 创建dataset.yaml文件
    yaml_content = f"""path: {os.path.abspath(output_dir)}
train: images/train
val: images/val

# 类别
names:
  0: happiness
  1: surprise
  2: disgust
  3: repression
  4: others
"""
    
    with open(os.path.join(output_dir, "casme2.yaml"), 'w') as f:
        f.write(yaml_content)
    
    print("转换完成！")

# 使用示例
if __name__ == "__main__":
    convert_casme2_to_yolo(
        casme_root="/path/to/CASME2",  # 替换为你的CASME II路径
        output_dir="CASME2_YOLO"
    )
```

## 3. 关键问题解决方案

### 3.1 缺少边界框标注的问题
CASME II数据集不提供面部或微表情区域的边界框标注，解决方法有：

1. **使用面部检测器自动生成边界框**：
```python
# 使用OpenCV的DNN面部检测器
def detect_face(frame):
    net = cv2.dnn.readNetFromCaffe(
        "deploy.prototxt",  # 从OpenCV或网上获取
        "res10_300x300_ssd_iter_140000.caffemodel"  # 预训练权重
    )
    h, w = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
    net.setInput(blob)
    detections = net.forward()
    
    for i in range(detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:  # 置信度阈值
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            return box  # 返回(x1, y1, x2, y2)
    
    return None  # 如果没有检测到面部
```

2. **使用面部Landmark定位微表情区域**：
```python
# 使用dlib或MediaPipe获取面部关键点
# 然后根据关键点定义ROI区域
```

### 3.2 时间维度处理
微表情是时序现象，可以考虑：

1. **使用连续多帧作为输入**：
```python
# 在转换脚本中保存连续帧序列
frame_sequence = []
sequence_length = 5  # 使用5帧序列

# 在处理视频时保存序列
if len(frame_sequence) < sequence_length:
    frame_sequence.append(frame)
else:
    frame_sequence.pop(0)
    frame_sequence.append(frame)
    
    # 保存序列中的中间帧作为代表帧
    # 并标注整个序列的微表情
```

2. **生成光流图作为额外通道**：
```python
def compute_optical_flow(prev_frame, current_frame):
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    curr_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY)
    
    flow = cv2.calcOpticalFlowFarneback(
        prev_gray, curr_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0
    )
    
    # 将光流转换为RGB可视化
    hsv = np.zeros_like(prev_frame)
    hsv[..., 1] = 255
    
    mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
    hsv[..., 0] = ang * 180 / np.pi / 2
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
    
    return cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
```

## 4. 使用转换后的数据训练YOLOv8

```python
from ultralytics import YOLO

# 加载模型
model = YOLO('yolov8n.pt')  # 或 yolov8s, yolov8m 等

# 使用转换后的数据训练
results = model.train(
    data='CASME2_YOLO/casme2.yaml',
    imgsz=640,
    epochs=100,
    batch=16,
    patience=20,
    device='0',  # 使用GPU
    project='microexpression_detection',
    name='yolov8n_casme2'
)
```

## 5. 注意事项

1. **数据量问题**：CASME II样本较少，建议使用数据增强或与其他微表情数据集合并
2. **类别不平衡**：某些微表情类别样本较少，训练时可使用加权损失
3. **评估指标**：微表情检测需要特殊评估指标，不能仅依赖常规目标检测指标
4. **时序信息**：考虑结合LSTM或3D CNN处理时序信息

通过以上方法，你可以将CASME II数据集成功转换为YOLOv8可用的格式，并训练一个微表情检测模型。实际应用中可能需要根据具体需求调整边界框生成策略和模型架构。